rm(list=ls())
library(readr)

########################### CHINA ############################

# Load the full dates sample in
dates_df_china <- read.csv("../data/china_dates.csv", header = FALSE, stringsAsFactors = FALSE)

all_dates_china <- dates_df_china[[1]]

set.seed(42)  # for reproducibility

# Randomly sample 20 dates
random_dates_china <- sample(all_dates_china, 20)

print(sort(random_dates_china))

# Output
write.csv(sort(random_dates_china), "../output/china_20_random_dates.csv")

###################### INDONESIA #################################

# Load the full dates sample in
dates_df_indonesia <- read.csv("../data/indonesia_dates.csv", header = FALSE, stringsAsFactors = FALSE)

all_dates_indonesia <- as.Date(dates_df_indonesia[[2]])

set.seed(42)  # for reproducibility

# Randomly sample 20 dates
random_dates_indonesia <- sample(all_dates_indonesia, 20)

print(sort(random_dates_indonesia))

# we have no data for 3 out of the 20 resulting dates above (missing newspaper issues)
# so we drop those
random_dates_indonesia_filtered <- random_dates_indonesia[!format(random_dates_indonesia, "%Y") %in% c("1952", "1953")]

# and we need to sample for another 3 random dates

# Exclude the 20 sampled dates and get remaining ones
remaining_dates <- all_dates_indonesia[!all_dates_indonesia %in% random_dates_indonesia]

# Sample 3 new dates from the remaining issues
new_dates3 <- sample(remaining_dates, 3)

# Print the 3 new dates
print(sort(new_dates3))

# Add them together and output
write.csv(sort(c(random_dates_indo_filtered, new_dates3)), "../output/indonesia_20_random_dates.csv")

########################### VIETNAM #################################

# Load the full dates sample in
dates_df_vn <- read.csv("../data/vietnam_dates.csv", header = FALSE, stringsAsFactors = FALSE)

all_dates_vn <- dates_df_vn[[1]]

# Randomly sample 20 dates
set.seed(80)  # for reproducibility
random_dates_vn <- sample(all_dates_vn, 20)

print(sort(random_dates_vn))

# Output
write.csv(sort(random_dates_vn), "../output/vietnam_20_random_dates.csv")


